VPNconnect: vpn.uoregon.edu, login ID

Mount to Talapas to access raw data (via Terminal Window)

#sshfs :/projects/niell/dniell /Users/deniseniell/talapas_dniell #cd /Users/deniseniell/talapas_dniell/cellranger

Open Rmd file (the following commands will be in RStudio)

Set your working directory path

#setwd("/Users/deniseniell/Desktop/Seurat/run2")
rm(list = ls())

Load libraries

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
#Note: you might get an error when loading dplyr but if you just run the command again, then it will load and everything should work well
library(Seurat)
library(Matrix)
library(ggplot2)
library(sctransform)
library(stringr)

Load the Octo Seq raw data ~2m

Keep track of which data set you are working with. The code is written so that you can run through the pipeline without having to worry about changing variables (i.e. data sets are imported to the variable “all” so that all the following commands will process the data without you needing to change the variable each time)

all <- Read10X(data.dir = "D:/data/octo seq/Cellranger/OctoSeq2.1/raw_feature_bc_matrix")

replace gene names in raw data with IDs from ccsv file

takes a few minutes

ref <- read.csv("D:/data/octo seq/refMaster_040420.csv",stringsAsFactors=FALSE)

ngenes <- length(all@Dimnames[[1]])
for (g in 1:ngenes){
  gene<-all@Dimnames[[1]][g]
  gene<-substr(gene,6,str_length(gene)-2)
  ind<-grep(gene,ref[[1]])
  if (length(ind)>0) {
    id <- ref[[ind[1],2]]
    if (str_length(id)>0) {
      id <- str_remove_all(id,"\\(") # parentheses mess up gene names as dimensions
      id <- str_remove_all(id,"\\)")
      id <- substr(id,1,60) # keep it short
      all@Dimnames[[1]][g]<- paste(id,gene,sep='-')
    }
  }
}

option to read in RDS file here

#all <- readRDS(file = "/Users/deniseniell/Desktop/Seurat/run2/OSmarkersTree.rds")

Initialize the Seurat object with the raw (non-normalized data).

#Change the project name here so that you can keep track of your data

all <- CreateSeuratObject(counts = all, project = "OctoSeq2_names", min.cells = 3, min.features = 200)
## Warning: Feature names cannot have underscores ('_'), replacing with dashes
## ('-')
## Warning: Feature names cannot have pipe characters ('|'), replacing with dashes
## ('-')

Begin Quality Control Steps

mito.genes <- grep(pattern = "^mt-", x = rownames(x = all), value = TRUE)
percent.mito <- Matrix::colSums(all) / Matrix::colSums(all)
all[["percent.mt"]] <- PercentageFeatureSet(all, pattern = "^MT-")
all <- subset(all, subset = nFeature_RNA > 200 & nFeature_RNA < 2500 & percent.mt < 5 & nCount_RNA>1000)
plot1 <- FeatureScatter(all, feature1 = "nCount_RNA", feature2 = "percent.mt")
plot2 <- FeatureScatter(all, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
CombinePlots(plots = list(plot1, plot2))

Number of unique genes and total moleculares are automatically calculated during CreateSeuratObject, but you can view these metrics because they are stored in the object meta data

head(all@meta.data, 5)
##                      orig.ident nCount_RNA nFeature_RNA percent.mt
## AAACCCAAGCATTGTC OctoSeq2_names       3058         1639          0
## AAACCCAAGGTATCTC OctoSeq2_names       1462          997          0
## AAACCCACAATTTCTC OctoSeq2_names       2403         1510          0
## AAACCCACACAGTGTT OctoSeq2_names       3276         1800          0
## AAACCCACACGGGTAA OctoSeq2_names       4402         1854          0
median(all@meta.data$nCount_RNA)
## [1] 1858
median(all@meta.data$nFeature_RNA)
## [1] 1141
length(all@meta.data$nCount_RNA)
## [1] 11169

You can also visualize these QC metrics and use this to decide how to filter cells

VlnPlot(all, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol=3)

# Begin normalizing the data

all <- NormalizeData(all, normalization.method = "LogNormalize", scale.factor = 10000)

You can also use the following command and you should receive the same output

#all <- NormalizeData(all)

Identification of highly variable features (feature selection), and plot these features

all <- FindVariableFeatures(all, selection.method = "vst", nfeatures = 2000)

Identify the 10 most highly variable genes

top10 <- head(VariableFeatures(all),10)

Plot variable features with and without labels

plot3 <- VariableFeaturePlot(all)
plot4 <- LabelPoints(plot = plot3, points = top10, repel = TRUE)
## Warning: Using `as.character()` on a quosure is deprecated as of rlang 0.3.0.
## Please use `as_label()` or `as_name()` instead.
## This warning is displayed once per session.
## When using repel, set xnudge and ynudge to 0 for optimal results
#CombinePlots(plots = list(plot3, plot4))

Apply a linear transformation (scaling), which is a standard pre-processing step prior to dimensional reduction techniques like PCA ~2m

all.genes <- rownames(all)
all <- ScaleData(all, features = all.genes)
## Centering and scaling data matrix

Perform linear dimensional reduction (PCA) ~15-20m

I normally will create a new variable/Seurat object at this point and rename it as “all.norm” so that I know that I have all of the preprocesed data prior to running PCA, and I can manipulate the PCs etc moving forward but still be able to go back to the processed data easily

all.norm <- RunPCA(all, features = VariableFeatures(object = all), npcs = 100)
## PC_ 1 
## Positive:  No hits-Ocbimv22033271m, Scavenger receptor cysteine rich domain lysyl oxidase like 3-Ocbimv22009850m, No hits-Ocbimv22027477m, gene:Ocbimv22016173m.g, No hits squid hit-Ocbimv22021234m, No hits-Ocbimv22032142m, No hits-Ocbimv22032145m, Matrixin metallopeptidase-Ocbimv22031750m, Cytochrome C somatic-Ocbimv22015949m, No hits squid hit-Ocbimv22006272m 
##     PLAC8-family-Ocbimv22016971m, von Willebrand factor-Ocbimv22011288m, AChE-Ocbimv22038398m, No hits squid hit-Ocbimv22039757m, Actin-Ocbimv22013417m, No hits-Ocbimv22016862m, No hits squid hit-Ocbimv22006708m, Thyroglobulin-Ocbimv22039703m, methylmalonic-aciduria-cobalamin-deficiency-cblC-type-with-h-Ocbimv22037026m, Astacin ShK domain like tolloid like-Ocbimv22025489m 
##     gene:Ocbimv22021233m.g, Dual-specificity-phosphatase-catalytic-domain--Protein-tyros-Ocbimv22001154m, No hits-Ocbimv22037232m, kyphoscoliosis-peptidase-Ocbimv22013362m, gene:Ocbimv22033753m.g, Calmodulin-Ocbimv22019794m, gene:Ocbimv22019749m.g, gene:Ocbimv22022617m.g, No hits-Ocbimv22012929m, Tetraspanin-Ocbimv22008510m 
## Negative:  Neuroendocrine hormone precursor-Ocbimv22007151m, EF-hand--EF-hand------EF-hand---calmodulin-1-phosphorylase-k-Ocbimv22032213m, gene:Ocbimv22001092m.g, Kazal-type-serine-protease-inhibitor-domain---Immunoglobulin-Ocbimv22029399m, gene:Ocbimv22000523m.g, gene:Ocbimv22009865m.g, Unknown function-Ocbimv22030969m, gene:Ocbimv22022323m.g, LICD protein family-Ocbimv22030344m, Subtilase-family-Proprotein-convertase-P-domain-proprotein-c-Ocbimv22033366m 
##     No hits-Ocbimv22005243m, Voltage Gated Potassium Channel-Ocbimv22006414m, No hits-Ocbimv22024117m, Lamin intermediate filament protein-Ocbimv22000068m, gene:Ocbimv22019384m.g, gene:Ocbimv22031183m.g, Copper type 2 dependent ascorbate dependent monoxygenase-Ocbimv22015079m, gene:Ocbimv22009322m.g, gene:Ocbimv22000524m.g, No hit-Ocbimv22011989m 
##     No hit squid hit-Ocbimv22001854m, CUB-domain-CUB-domain-containing-protein-2-Ocbimv22012935m, Caprin family member-Ocbimv22030377m, Cadherin-Ocbimv22025965m, Collagen-triple-helix-repeat-20-copies-Collagen-triple-helix-Ocbimv22039751m, No hits squid and invert hits-Ocbimv22028115m, gene:Ocbimv22001085m.g, No hits at all-Ocbimv22024372m, Universal-stress-protein-family-Ocbimv22006200m, Fibronectin-Ocbimv22039573m 
## PC_ 2 
## Positive:  Low density lipoprotein receptor-Ocbimv22000216m, Collagen triple helix repeat-Ocbimv22035245m, Insulin-like growth factor binding protein-Ocbimv22024561m, Collagen-triple-helix-repeat-20-copies-Collagen-triple-helix-Ocbimv22031313m, Collagen-triple-helix-repeat-20-copies-Collagen-triple-helix-Ocbimv22023946m, gene:Ocbimv22030005m.g, No hits squid and invert hits-Ocbimv22037269m, No hits-Ocbimv22024563m, Collagen triple helix repeat-Ocbimv22027357m, collagen-type-IV-alpha-3-Goodpasture-antigen-Ocbimv22032747m 
##     Eukaryotic type carbonic anhydrase-Ocbimv22023622m, Aquaporin-Ocbimv22038019m, No hits squid hit-Ocbimv22006273m, No hits squid hit-Ocbimv22006271m, Tetraspanin family molecule-Ocbimv22038024m, Ankyrin repeat/Notch?-Ocbimv22007283m, Cysteine-rich-secretory-protein-family-GLI-pathogenesis-rela-Ocbimv22013028m, Fibronectin-Ocbimv22011001m, 7TMR latrophil?-Ocbimv22005132m, EGFNotch?-Ocbimv22021827m 
##     Innexin-Ocbimv22016460m, Myosin light chain-Ocbimv22026600m, Vault-protein-inter-alpha-trypsin--von-Willebrand-factor-typ-Ocbimv22016107m, No hits-Ocbimv22006768m, FERM-N-terminal-domain-FERM-central-domain-protein-tyrosine--Ocbimv22016138m, C2H2-Ocbimv22012776m, Hsp20/alpha-crystallin-family-heat-shock-27kDa-protein-1-Ocbimv22036440m, No hits squid hit-Ocbimv22000497m, Eukaryotic-type-carbonic-anhydrase-Ocbimv22023623m, Immunoglobulin-I-set-domain-Immunoglobulin-V-set-domain--Imm-Ocbimv22012633m 
## Negative:  von Willebrand factor-Ocbimv22012241m, No hits squid hit-Ocbimv22018450m, No hits squid hit-Ocbimv22018451m, Neuroendocrine hormone precursor-Ocbimv22007151m, No hits squid hit-Ocbimv22021232m, Polysaccharide deacetylase-Ocbimv22030571m, von Willebrand factor-Ocbimv22011288m, GPCR Rhodopsin family but not one of our opsins-Ocbimv22017246m, methylmalonic-aciduria-cobalamin-deficiency-cblC-type-with-h-Ocbimv22037026m, Beta lactamase-Ocbimv22037424m 
##     gene:Ocbimv22032148m.g, GPCR Rhodopsin family but not one of our opsins-Ocbimv22001500m, No hits-Ocbimv22032137m, Putative peptidoglycan Mmp1-Ocbimv22002743m, Multicopper-oxidase-hephaestin-Ocbimv22036318m, Matrixin metallopeptidase-Ocbimv22031751m, EB-module-EB-module-Protein-tyrosine-phosphatase-Dual-specif-Ocbimv22033209m, Thyroglobulin-type-1-repeat-Ocbimv22033536m, No hits squid hit-Ocbimv22006708m, No hits squid hit-Ocbimv22006272m 
##     No hits-Ocbimv22002030m, Astacin ShK domain like tolloid like-Ocbimv22016617m, No hits-Ocbimv22032145m, Scavenger receptor cysteine rich domain lysyl oxidase like 3-Ocbimv22009850m, Innexin-Ocbimv22027764m, Multicopper-oxidase-hephaestin-like-1-Ocbimv22013232m, Multicopper oxidase hepaestin-Ocbimv22036319m, Galactoside-binding-lectin-Galactoside-binding-lectin-Galact-Ocbimv22037980m, Cytochrome C somatic-Ocbimv22015949m, SOUL-heme-binding-protein-Ocbimv22031901m 
## PC_ 3 
## Positive:  von-Willebrand-factor-type-A-domain--Ocbimv22013060m, linker-histone-H1-and-H5-family-H1-histone-family-member-0-Ocbimv22034699m, RhoGAP-domain-Ocbimv22012861m, linker-histone-H1-and-H5-family-H1-histone-family-member-0-Ocbimv22034700m, Core-histone-H2A/H2B/H3/H4-histone-cluster-2-H3d-Ocbimv22001065m, gene:Ocbimv22032139m.g, nucleolar-and-spindle-associated-protein-1-Ocbimv22039446m, Core-histone-H2A/H2B/H3/H4-Histone-like-transcription-factor-Ocbimv22004229m, von-Willebrand-factor-type-A-domain-Ocbimv22013063m, Protein-kinase-domain-Protein-tyrosine-kinase--POLO-box-dupl-Ocbimv22028963m 
##     Cell-cycle-regulated-microtubule-associated-protein-TPX2-mic-Ocbimv22033524m, Protein-kinase-domain-Protein-tyrosine-kinase--aurora-kinase-Ocbimv22010207m, Cathepsin-Ocbimv22009822m, Kinesin-motor-domain-Tesmin/TSO1-like-CXC-domain-kinesin-fam-Ocbimv22002108m, Core-histone-H2A/H2B/H3/H4-Histone-like-transcription-factor-Ocbimv22004225m, gene:Ocbimv22000757m.g, gene:Ocbimv22011688m.g, Rad51--recA-bacterial-DNA-recombination-protein-KaiC-RAD51-h-Ocbimv22024630m, linker-histone-H1-and-H5-family-H1-histone-family-member-0-Ocbimv22026507m, Importin-beta-binding-domain-Armadillo/beta-catenin-like-rep-Ocbimv22005104m 
##     Kinesin-motor-domain-centromere-protein-E-312kDa-Ocbimv22025596m, Nbl1-/-Borealin-N-terminal-Cell-division-cycle-associated-pr-Ocbimv22026418m, Core-histone-H2A/H2B/H3/H4-H2A-histone-family-member-V-Ocbimv22011053m, Protein-kinase-domain-Protein-tyrosine-kinase-Ocbimv22003693m, RhoGAP-domain-Rho-GTPase-activating-protein-19-Ocbimv22035962m, gene:Ocbimv22009747m.g, Timeless-protein-Timeless-protein-C-terminal-region-timeless-Ocbimv22004280m, Inhibitor-of-Apoptosis-domain-baculoviral-IAP-repeat-contain-Ocbimv22009148m, gene:Ocbimv22005812m.g, Penicillinase-repressor-Mnd1-family-meiotic-nuclear-division-Ocbimv22026378m 
## Negative:  No hits squid hit-Ocbimv22006271m, Insulin-like growth factor binding protein-Ocbimv22024561m, Low density lipoprotein receptor-Ocbimv22000216m, Collagen triple helix repeat-Ocbimv22035245m, Collagen-triple-helix-repeat-20-copies-Collagen-triple-helix-Ocbimv22031313m, Eukaryotic type carbonic anhydrase-Ocbimv22023622m, No hits-Ocbimv22024563m, Aquaporin-Ocbimv22038019m, Ankyrin repeat/Notch?-Ocbimv22007283m, Collagen-triple-helix-repeat-20-copies-Collagen-triple-helix-Ocbimv22023946m 
##     von-Willebrand-factor-type-A-domain--GCC2-and-GCC3-Sushi-dom-Ocbimv22038259m, Stanniocalcin-family-Ocbimv22031945m, Collagen triple helix repeat-Ocbimv22027357m, No hits squid and invert hits-Ocbimv22037269m, collagen-type-IV-alpha-3-Goodpasture-antigen-Ocbimv22032747m, EGFNotch?-Ocbimv22021827m, Cysteine-rich-secretory-protein-family-GLI-pathogenesis-rela-Ocbimv22013028m, Vault-protein-inter-alpha-trypsin--von-Willebrand-factor-typ-Ocbimv22016107m, No hits squid hit-Ocbimv22006273m, 7TMR latrophil?-Ocbimv22005132m 
##     Perilipin-Ocbimv22023930m, von Willebrand factor-Ocbimv22012241m, AchR-Ocbimv22006182m, Myosin light chain-Ocbimv22026600m, No hits-Ocbimv22006768m, No hits squid hit-Ocbimv22018451m, No hits squid hit-Ocbimv22018450m, C2H2-Ocbimv22012776m, Plexin-Ocbimv22006407m, Laminin-N-terminal-Domain-VI-Ocbimv22034377m 
## PC_ 4 
## Positive:  gene:Ocbimv22035787m.g, BTG-family-B-cell-translocation-gene-1-anti-proliferative-Ocbimv22024928m, 60s-Acidic-ribosomal-protein-ribosomal-protein-large-P1-Ocbimv22024382m, Cadherin-domain-Cadherin-domain-Cadherin-domain-Cadherin-dom-Ocbimv22039316m, slit-homolog-2-Drosophila-Ocbimv22018812m, Core-histone-H2A/H2B/H3/H4-H2A-histone-family-member-V-Ocbimv22011053m, Zinc-finger-C2H2-type--zinc-finger-protein-285B-pseudogene-Ocbimv22037537m, Ribosomal-protein-S5-N-terminal-domain-Ribosomal-protein-S5--Ocbimv22018401m, Sema-domain-Plexin-repeat-IPT/TIG-domain-IPT/TIG-domain-IPT/-Ocbimv22006562m, Protein-of-unknown-function-DUF1151-family-with-sequence-sim-Ocbimv22016015m 
##     Nucleoside-diphosphate-kinase-NME1-NME2-readthrough-Ocbimv22012462m, bZIP-transcription-factor-Basic-region-leucine-zipper-bZIP-M-Ocbimv22024895m, Cadherin-like-Cadherin-domain-Cadherin-domain-Cadherin-domai-Ocbimv22020837m, gene:Ocbimv22032467m.g, Ras-family-Miro-like-protein-RAB14-member-RAS-oncogene-famil-Ocbimv22023215m, linker-histone-H1-and-H5-family-H1-histone-family-member-0-Ocbimv22026504m, Rhodanese-like-domain-Dual-specificity-phosphatase-catalytic-Ocbimv22013786m, Thymosin-beta-4-family-Thymosin-beta-4-family-Thymosin-beta--Ocbimv22038101m, GPCR-Ocbimv22039826m, Zinc-finger-C2H2-type--zinc-finger-protein-64-homolog-mouse-Ocbimv22009787m 
##     RNA-recognition-motif.-a.k.a.-RRM-RBD-or-RNP-domain--RNA-rec-Ocbimv22030918m, Cysteine Sulfinic Acid Decarboxylase-Ocbimv22004812m, Ribosomal-protein-L10-60s-Acidic-ribosomal-protein-ribosomal-Ocbimv22035130m, Beta Tubulin-Ocbimv22029847m, Ergosterol-biosynthesis-ERG4/ERG24-family-lamin-B-receptor-Ocbimv22003728m, 60s-Acidic-ribosomal-protein-ribosomal-protein-large-P2-Ocbimv22010428m, Ribosomal-protein-S10p/S20e-ribosomal-protein-S20-Ocbimv22005123m, Aquaporin-Ocbimv22022904m, CUB-domain-Ocbimv22036947m, Ribosomal-protein-S2-Ribosomal-protein-S2-ribosomal-protein--Ocbimv22002200m 
## Negative:  Neuroendocrine hormone precursor-Ocbimv22007151m, gene:Ocbimv22031183m.g, LICD protein family-Ocbimv22030344m, Carbohydrate-phosphorylase-phosphorylase-glycogen;-brain-Ocbimv22020127m, gene:Ocbimv22000523m.g, Subtilase-family-Proprotein-convertase-P-domain-proprotein-c-Ocbimv22033366m, EF-hand--EF-hand------EF-hand---calmodulin-1-phosphorylase-k-Ocbimv22032213m, gene:Ocbimv22022323m.g, No hit squid hit-Ocbimv22001854m, gene:Ocbimv22000524m.g 
##     Copper type 2 dependent ascorbate dependent monoxygenase-Ocbimv22015079m, Lamin intermediate filament protein-Ocbimv22000068m, gene:Ocbimv22001092m.g, VMAT A-Ocbimv22031489m, KH-domain-KH-domain-Tudor-domain-tudor-and-KH-domain-contain-Ocbimv22032337m, Zinc-finger-C3HC4-type-RING-finger--Ocbimv22009276m, Kazal-type-serine-protease-inhibitor-domain---Immunoglobulin-Ocbimv22029399m, gene:Ocbimv22012879m.g, gene:Ocbimv22001641m.g, Fibrinogen like-Ocbimv22002134m 
##     gene:Ocbimv22024976m.g, Immunoglobulin-V-set-domain-CD80-like-C2-set-immunoglobulin--Ocbimv22019705m, Cadherin-Ocbimv22025965m, gene:Ocbimv22024839m.g, lactate/malate-dehydrogenase-NAD-binding-domain-lactate/mala-Ocbimv22001700m, gene:Ocbimv22005441m.g, No hits at all-Ocbimv22024372m, Helix-loop-helix-DNA-binding-domain-Ocbimv22004730m, Sulfotransferase Tango13 Transport-Ocbimv22015376m, Fas-apoptotic-inhibitory-molecule-FAIM1-Ocbimv22013823m 
## PC_ 5 
## Positive:  No hit one weak to Voltage dependent calcium channel-Ocbimv22022356m, Citron Rho interacting serine/threonine kinase-Ocbimv22027688m, TH-Ocbimv22017369m, No hit Squid hit-Ocbimv22014258m, No hit-Ocbimv22026896m, Voltage Gated Potassium Channel-Ocbimv22005261m, No hit Squid hit-Ocbimv22036586m, No hits-Ocbimv22022111m, Potassium channel TWiK family-Ocbimv22011680m, Voltage Gated Potassium Channel-Ocbimv22000184m 
##     gene:Ocbimv22027685m.g, Kazal-type-serine-protease-inhibitor-domain-Kazal-type-serin-Ocbimv22017255m, Dopa-decarboxylase? DBH-Ocbimv22022526m, Calcium Activated Potassium Channel-Ocbimv22029661m, DAT-Ocbimv22026818m, Copper type 2 dependent ascorbate dependent monoxygenase DBH-Ocbimv22029227m, SLC7/9?-Ocbimv22014656m, Universal-stress-protein-family-Ocbimv22006200m, Deoxyribonuclease-Ocbimv22015857m, No hit-Ocbimv22017371m 
##     FMRF related peptide-Ocbimv22023842m, No hit Squid hit-Ocbimv22033868m, No hit Squid hit-Ocbimv22007770m, Universal stress protein family-Ocbimv22023291m, Afadin-Ocbimv22006635m, No hit Squid hit-Ocbimv22018791m, family-with-sequence-similarity-43-member-A-Ocbimv22013761m, Acid Sensing Ion Channel-Ocbimv22006431m, Calponin-homology-CH-domain-Calponin-homology-CH-domain-Spec-Ocbimv22004991m, FRAS1-related-extracellular-matrix-protein-2-Ocbimv22026849m 
## Negative:  Ribosomal-protein-S5-N-terminal-domain-Ribosomal-protein-S5--Ocbimv22018401m, Profilin-Ocbimv22036924m, Nucleoside-diphosphate-kinase-NME1-NME2-readthrough-Ocbimv22012462m, K+-channel-tetramerisation-domain-potassium-channel-tetramer-Ocbimv22001228m, gene:Ocbimv22035787m.g, VMAT A-Ocbimv22031489m, Ribosomal-protein-L11-N-terminal-domain-Ribosomal-protein-L1-Ocbimv22027712m, VACHT-Ocbimv22001681m, Ribosomal-protein-L13e-ribosomal-protein-L13-Ocbimv22002209m, 60s-Acidic-ribosomal-protein-ribosomal-protein-large-P2-Ocbimv22010428m 
##     RHO-protein-GDP-dissociation-inhibitor-Rho-GDP-dissociation--Ocbimv22014986m, Ribosomal-S3Ae-family-ribosomal-protein-S3a-pseudogene-47-Ocbimv22032319m, Ephrin-Ocbimv22026748m, gene:Ocbimv22027278m.g, Ribosomal-protein-L3-ribosomal-protein-L3-Ocbimv22027429m, Ribosomal-protein-S10p/S20e-ribosomal-protein-S20-Ocbimv22005123m, Helix-loop-helix-DNA-binding-domain-Ocbimv22004730m, Ribosomal-protein-L14-Ocbimv22009408m, Ribosomal-protein-S8-ribosomal-protein-S15a-Ocbimv22037875m, Redoxin-AhpC/TSA-family-C-terminal-domain-of-1-Cys-peroxired-Ocbimv22020867m 
##     Core-histone-H2A/H2B/H3/H4-H2A-histone-family-member-V-Ocbimv22011053m, Ribosomal-protein-S2-Ribosomal-protein-S2-ribosomal-protein--Ocbimv22002200m, Ribosomal-L29e-protein-family-ribosomal-protein-L29-Ocbimv22001580m, gene:Ocbimv22001092m.g, LIM-domain-LIM-domain-LIM-domain-only-1-rhombotin-1-Ocbimv22011880m, KIAA1598-Ocbimv22011933m, Fas-apoptotic-inhibitory-molecule-FAIM1-Ocbimv22013823m, ChAT-Ocbimv22001674m, Collagen-triple-helix-repeat-20-copies-Collagen-triple-helix-Ocbimv22031313m, Copine-Ocbimv22038284m

save data at this point just in case RStudio crashes!

#saveRDS(all.norm, file = #"/Users/deniseniell/Desktop/Seurat/run2/OSr2norm200.rds")

Visualize PCAs in a few different ways

You can skip the next few lines of code (visualization of PCs, elbow and jackstraw) if you know how many pcs you want to move forward with.

DimHeatmap(all.norm, dims = 1, cells = 500, balanced = TRUE)

DimHeatmap(all.norm, dims = 1:10, cells = 500, balanced = TRUE)

VizDimLoadings(all.norm, dims = 1:2, reduction = "pca")

DimPlot(all.norm, reduction = "pca")

# Seurat clusters cells based on their PCA scores, with each PC essentially representing a “metafeature” that combines information across a correlated feature set. Top principle components represent robust compression of the dataset. To determine how many components to include, one can utilize a resampling test inspired by the JackStraw procedure.

Use the ElbowPlot function before JackStraw to visualize and save on computing power since the JackStraw method can take a long time for processing big datasets, others rely on the ElbowPlot function to reduce computation time and still produce an approximation.

ElbowPlot(all.norm, ndims = 200)
## Warning in ElbowPlot(all.norm, ndims = 200): The object only has information for
## 100 reductions

This sampling strategy randomly permutes a subset of the data (1% is the default) ad erun PCA, constructing a “null distribution” of feature scores, and repeat this procedure. This allows an identification of “significant” PCs as those who have a strong enrichment of low p-value features.

The JackStrawPlot function provides a visualization tool for comparing the distribution of p-values for each PC with a uniform distribution (indicated by the dashed line). “Significant” PCs will show a strong enrichment of features with low p-values (indicated by the solid curve above the dashed line). ~30m

##Note: even if you run 200pcs, the max # of dims you can use for ScoreJackStraw and JackStrawPlot is 20.

#all.norm <- JackStraw(all.norm, num.replicate = 100)
#all.norm <- ScoreJackStraw(all.norm, dims = 1:20)
#JackStrawPlot(all.norm, dims = 1:20)

Cluster the cells

Seurat v3 applies a graph-based clustering approach. Briefly, these methods embed cells in a graph structure - for example a K-nearest neighbor (KNN) graph, with edges drawn between cells with similar feature expression patterns, and then attempt to partition this graph into highly interconnected “quasi-cliques” or “communities”

(like PhenoGraph), a KNN graph is constructed based on the elucidean distance in PCA space. Then, the FindNeighbors function refines edge weights between any two cells based on the shared overlap in their local neighborhoods (this is referred to as Jaccard similarity). This function takes the previously defined input regarding number of PCs (now, dims)

To cluster cells, Seurat applies a modularity optimization technique, such as Louvain algorithm (which is the default) or SLM, to iteratively group cells together, with the goal of optimizing the standard modularity function.

FindClusters implements the procedure mentioned above and contains a resolution parameter that sets “granularity” of downstream clustering, with increased values leading to a greater number of clusters. Typically, setting the parameter between 0.4-1.2 typically returns good results for single-cell datasets of around 3K cells. Clusters could be found using the Idents function.

##Note: even if you only run 20 dims with the JackStraw function above, looks like you can still proceed with >20 dims in the following commands ~5m

all.norm <- FindNeighbors(all.norm, dims = 1:20) # changed this from 200, to be consistent with FindClusters next
## Computing nearest neighbor graph
## Computing SNN
all.norm <- FindClusters(all.norm, reduction.type = "pca", dims = 1:20, resolution = 1)
## Warning: The following arguments are not used: reduction.type, dims
## Suggested parameter: reduction instead of reduction.type
## Warning: The following arguments are not used: reduction.type, dims
## Suggested parameter: reduction instead of reduction.type
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 11169
## Number of edges: 389925
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8434
## Number of communities: 22
## Elapsed time: 1 seconds
##Notes: 200dims, res 1 yields 18 singletons and 15 final clusters; 200dims, res 0.5 yields 18 singletons and 12 final clusters; 200dims, res 1.5 yields 29 singletons and 68 final clusters; 200dims, res 0.2 yields 18 singletons and 9 final clusters; 150dims, res 1 yields 18 singletons and 15 final clusters; 100dims, res 1 yields 18 singletons and 15 final clusters; 50dims, res 1 yields 18 singletons and 15 final clusters; 50dims, res 0.5 yields 18 singletons and 12 clusters. --> let's stick with 50dims at res 1 for now and revisit FindNeighbors parameters to address singleton issue
##Notes: Neighbors = 200 dims, Clusters = 50dims, at res 0.05 there are 18 singletons and 9 final clusters; Neighbors = 200, Clusters = 50dims, at res 0.01 there are 18 singletons and 6 final clusters

## Additional notes: for OctoSeq2, 50dims res 1 yields 1 singleton and 21 final clusters.

Look at cluster IDS of the first 5 cells

head(Idents(all.norm), 5)
## AAACCCAAGCATTGTC AAACCCAAGGTATCTC AAACCCACAATTTCTC AAACCCACACAGTGTT 
##                6                2               12                5 
## AAACCCACACGGGTAA 
##               15 
## Levels: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21

Run non-linear dimensional reduction (UMAP/tSNE)

Non-linear dimensional reduction techniques, such as tSNE and UMAP, allows one to visualize and explore the datasets. The goal of these algorithms is to learn the underlying manifold of the data in order to place similar cells together in a low-dimensional space. Cells within the graph-based clusters determined above should co-localize on these dimension reduction plots. Seurat suggests using the same PCs as what was provided as input to the clustering analysis.

all.norm <- RunUMAP(all.norm, dims = 1:20) # was 1:20 dims
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 15:45:26 UMAP embedding parameters a = 0.9922 b = 1.112
## 15:45:26 Read 11169 rows and found 20 numeric columns
## 15:45:26 Using Annoy for neighbor search, n_neighbors = 30
## 15:45:26 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 15:45:28 Writing NN index file to temp file C:\Users\CRISNI~1\AppData\Local\Temp\RtmpsVPW52\file2ae84467b39
## 15:45:28 Searching Annoy index using 1 thread, search_k = 3000
## 15:45:32 Annoy recall = 100%
## 15:45:33 Commencing smooth kNN distance calibration using 1 thread
## 15:45:34 Initializing from normalized Laplacian + noise
## 15:45:34 Commencing optimization for 200 epochs, with 484180 positive edges
## 15:45:47 Optimization finished
DimPlot(all.norm, reduction = "umap", label = TRUE)

# Run non-linear dimensional reduction with tSNE (UMAP preferred, so this section is edited/commented out) #OS1.15 <-RunTSNE(object = OS1.norm, dims = 1:15, do.fast = TRUE) TSNEPlot(object = OS1.15, do.label = TRUE)

#saveRDS(all.norm, file = #"/Users/deniseniell/Desktop/Seurat/run2/OSr2PCs200res50.rds")

Build a phylogenetic tree, and rename/reorder cluster names according to their position on the tree

See help for details on tree building strategy

Assigned cluster will be placed in the ‘tree.ident’ field of , and also stored in

all.normTREE <- BuildClusterTree(all.norm, reorder = TRUE, reorder.numeric = TRUE, slot = "scale.data", verbose = TRUE, dims=1:20)
## Reordering identity classes and rebuilding tree
PlotClusterTree(all.normTREE)

Run non-linear dimensional reduction (UMAP/tSNE)

Non-linear dimensional reduction techniques, such as tSNE and UMAP, allows one to visualize and explore the datasets. The goal of these algorithms is to learn the underlying manifold of the data in order to place similar cells together in a low-dimensional space. Cells within the graph-based clusters determined above should co-localize on these dimension reduction plots. Seurat suggests using the same PCs as what was provided as input to the clustering analysis.

# don't need to recalculate, just replot
#all.normTREE <- RunUMAP(all.normTREE, dims = 1:50)
DimPlot(all.normTREE, reduction = "umap", label = TRUE)

Finding differentially expressed features (for 6 clusters, ~15m)

#FindMarkers will find markers between two different identity groups - you have to specify both identity groups. This is useful for comparing the differences between two specific groups.

#FindAllMarkers will find markers differentially expressed in each identity group by comparing it to all of the others - you don’t have to manually define anything. Note that markers may bleed over between closely-related groups - they are not forced to be specific to only one group. This is what most people use (and likely what you want).

cluster.markers <- FindAllMarkers(all.normTREE, min.pct = 0.5, logfc.threshold = 0.5)
## Calculating cluster 1
## Calculating cluster 2
## Calculating cluster 3
## Calculating cluster 4
## Calculating cluster 5
## Calculating cluster 6
## Calculating cluster 7
## Calculating cluster 8
## Calculating cluster 9
## Calculating cluster 10
## Calculating cluster 11
## Calculating cluster 12
## Calculating cluster 13
## Calculating cluster 14
## Calculating cluster 15
## Calculating cluster 16
## Calculating cluster 17
## Calculating cluster 18
## Calculating cluster 19
## Calculating cluster 20
## Calculating cluster 21
## Calculating cluster 22
#write.csv(cluster.markers, file = "/Users/deniseniell/Desktop/Seurat/run2/clustermarkers_OSr2PCs200res50.csv")
#saveRDS(all.normTREE, file = #"/Users/deniseniell/Desktop/Seurat/run2/OSmarkersTree.rds")

Visualizing top 10 markers

top10 <- cluster.markers %>% group_by(cluster) %>% top_n(n = 5,wt = avg_logFC )
#top10 <- cluster.markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_logFC)
#DoHeatmap(all.normTREE, features = top10$gene) + NoLegend() + theme(axis.text.y = element_text(size = 5))
DotPlot(all.normTREE,features=rev(unique(top10[1:50,]$gene))) + RotatedAxis()+ theme(axis.text.x = element_text(size = 7))

DotPlot(all.normTREE,features=rev(unique(top10[51:100,]$gene))) + RotatedAxis()+ theme(axis.text.x = element_text(size = 7))

DotPlot(all.normTREE,features=rev(unique(top10[101:159,]$gene))) + RotatedAxis()+ theme(axis.text.x = element_text(size = 7))
## Warning: Factor `cluster` contains implicit NA, consider using
## `forcats::fct_explicit_na`
## Warning in FetchData(object = object, vars = features): The following requested
## variables were not found: NA

get markers for each cluster separated out

cluster_markers <- list()

nclust = nlevels(Idents(all.normTREE))
for(i in 1:nclust) {
cluster_markers[[i]] <- cluster.markers[which(cluster.markers$cluster == i),]
   #cluster_markers[[i]] <- FindMarkers(
    # all.normTREE, ident.1 = i, min.pct=0.5, logfc.threshold = 0.5)
}

heatmap for individual clusters

for (i in 1:nclust){
  these <- cluster_markers[[i]]
  these <-these[which(these$avg_logFC>0.5),]
  these <- these[order(these$avg_logFC, decreasing = TRUE),]
  
   print(DoHeatmap(all.normTREE, features = head(these$gene,40)) + NoLegend() + theme(axis.text.y = element_text(size = 6)))

   #print(head(these,40))
}

### get markers for pairwise discrimination

#pair.markers <- FindMarkers(all.normTREE, ident.1 = 24, ident.2=25,min.pct = 0.5, logfc.threshold = 0.5)

#   DoHeatmap(all.normTREE, features = rownames(pair.markers)) + NoLegend() + theme(axis.text.y = element_text(size = 8))

get markers of groups

#pair.markers <- FindMarkers(all.normTREE, ident.1 = c(15,16,17, 18, 19),min.pct = 0.5, logfc.threshold = 0.5)

#   DoHeatmap(all.normTREE, features = rownames(pair.markers)) + NoLegend() + theme(axis.text.y = element_text(size = 8))

#tree maps————————————————————————–

Grab possible nodes using FindMarkers from seurat object, assuming you’ve already run BuildClusterTree()

nodes <- unique(all.normTREE@tools$BuildClusterTree$edge[,1])

tree_markers <- list()
for(i in 1:length(nodes)) {
#for(i in 1:2) {
   tree_markers[[i]] <- FindMarkers(
     all.normTREE, ident.1 = "clustertree", ident.2 = nodes[i], min.pct=0.5)
}

selecting markers that are greater than log2, sorting them and taking 10 most neg (left?) and 10 most pos (right?)

goodmarkers <- list()
leftMarkers <- list()
rightMarkers <- list()
for(i in 1:length(nodes)){
  these <- tree_markers[[i]]
  these <-these[which(abs(these$avg_logFC)>0.5),]
  these <- these[order(these$avg_logFC, decreasing = TRUE),]
  these$node_id <- nodes[[i]]
  these$gene_id<-rownames(these)
  leftMarkers[[i]] <- head(these,3)
  rightMarkers[[i]] <- tail(these,3)
  goodmarkers[[i]] <- these

}

#save out goodmarkers[[i]] and nodes to csv file

#rbind.fill(goodmarkers)
#df_goodtable <- ldply(goodmarkers, data.frame)
#capture.output(summary(df_goodtable), file = "/Users/deniseniell/Desktop/Seurat/run2/R2goodtable")
#write.csv(df_goodtable, file = "/Users/deniseniell/Desktop/Seurat/run2/R2goodmark_list.csv")

concatenate left and right markers for all nodes

allmarkers <- rownames(goodmarkers[[1]])
leftRightMarkers <- c(rownames(leftMarkers[[1]]),rownames(rightMarkers[[1]]))
for(i in 2:length(nodes)){
  allmarkers <- c(allmarkers,rownames(goodmarkers[[i]]))
  leftRightMarkers <- c(leftRightMarkers,rownames(leftMarkers[[i]]),rownames(rightMarkers[[i]]))
  
}

#heatmap for each node individually

for(i in 1:length(tree_markers)){
  i
  these <- tree_markers[[i]]
  these <-these[which(abs(these$avg_logFC)>0.5),]
  these <- these[order(these$avg_logFC, decreasing = TRUE),]
  these <- rbind(head(these,10),tail(these,10))
#print(these)

print(DoHeatmap(all.normTREE, features = rownames(these))+ NoLegend() + theme(axis.text.y = element_text(size = 6)))
  
}

## Warning in DoHeatmap(all.normTREE, features = rownames(these)): The following
## features were omitted as they were not found in the scale.data slot for
## the RNA assay: No hit squid and invert hits-Ocbimv22014608m1, LICD protein
## family-Ocbimv22030344m1, ATP citrate synthase-Ocbimv22001106m1, CUB-domain-
## CUB-domain-containing-protein-2-Ocbimv22012935m1, gene:Ocbimv22001092m.g1,
## gene:Ocbimv22022323m.g1, Unknown function-Ocbimv22030504m1, Kazal-type-serine-
## protease-inhibitor-domain-Kazal-type-serin-Ocbimv22017255m1

## Warning in DoHeatmap(all.normTREE, features = rownames(these)): The following
## features were omitted as they were not found in the scale.data slot for the RNA
## assay: Caprin family member-Ocbimv22030377m1, Unknown function-Ocbimv22030969m1,
## Nucleoside H+ symporter/Major Facilititator superfamily-Ocbimv22011289m1,
## lactate/malate-dehydrogenase-NAD-binding-domain-lactate/mala-Ocbimv22001700m1,
## ATP citrate synthase-Ocbimv22001106m1, AMP-binding-enzyme--acyl-CoA-synthetase-
## short-chain-family-m-Ocbimv22017857m1, gene:Ocbimv22017855m.g1, AchE?! Confirm-
## Ocbimv22023426m1

## Warning in DoHeatmap(all.normTREE, features = rownames(these)): The following
## features were omitted as they were not found in the scale.data slot for the
## RNA assay: Fibronectin-Ocbimv22039573m1, No hits-Ocbimv22024117m1, Cadherin-
## like-Cadherin-domain-Cadherin-domain-Cadherin-domai-Ocbimv22023977m1, No
## hits Squid hit-Ocbimv22011913m1, Cadherin-Ocbimv22003826m1, RHO-protein-GDP-
## dissociation-inhibitor-Rho-GDP-dissociation--Ocbimv22014986m1, No hit squid hit-
## Ocbimv22009888m1, VMAT A-Ocbimv22031489m1

## Warning in DoHeatmap(all.normTREE, features = rownames(these)): The following
## features were omitted as they were not found in the scale.data slot for
## the RNA assay: Kazal-type-serine-protease-inhibitor-domain-Kazal-type-
## serin-Ocbimv22017255m1, Phosphoglycerate-kinase-phosphoglycerate-kinase-1-
## Ocbimv22014453m1

#dotplot for one node individually

i<-14
  i
## [1] 14
  these <- tree_markers[[i]]
  these <-these[which(abs(these$avg_logFC)>0),]
  these <- these[order(these$avg_logFC, decreasing = TRUE),]
  these <- rbind(head(these,10),tail(these,10))

DotPlot(all.normTREE,features=rownames(these))+ RotatedAxis()+ theme(axis.text.x = element_text(size = 6))

#Heatmap of all concatenated node data

DoHeatmap(all.normTREE, features = leftRightMarkers) + NoLegend() + theme(axis.text.y = element_text(size = 4))

map developmental genes

genelist <- vector()
nomatch <- list()
for (i in 1:129){
  gene <- ref[[i,1]]
  gene<-substr(gene,7,str_length(gene)-1)
  loc <- grep(gene,all.genes)
  if (length(loc)>0) {
    genelist <- c(genelist,loc)
  } else {
    nomatch <- c(nomatch,ref[[i,2]])
  }
}
DoHeatmap(all.normTREE, features = all.genes[genelist],disp.min = -1.5, disp.max = 1.5)  + theme(axis.text.y = element_text(size = 4))

map neural genes

genelist <- vector();
nomatch <- list();
for (i in 130:207){
  gene <- ref[[i,1]]
  gene<-substr(gene,7,str_length(gene)-1)
  loc <- grep(gene,all.genes)
  if (length(loc)>0) {
    genelist <- c(genelist,loc)
  } else {
    nomatch <- c(nomatch,ref[[i,2]])
  }
}
DoHeatmap(all.normTREE, features = all.genes[genelist], disp.min = -1.5, disp.max = 1.5)  + theme(axis.text.y = element_text(size = 5))

#map cadherins

DoHeatmap(all.normTREE, features = all.genes[grep("Cadherin-O",all.genes)], disp.min = -1.5, disp.max = 1.5)  + theme(axis.text.y = element_text(size = 8))

#heatmap of yfg (your favorite gene)

yfg <- read.csv("D:/data/octo seq/Genes for in situ.csv",stringsAsFactors=FALSE)

genelist <- vector()
nomatch <- list()
for (i in 1:22){
  gene <- yfg[[i,2]]
  #gene<-substr(gene,7,str_length(gene)-1)
  loc <- grep(gene,all.genes)
  if (length(loc)>0) {
    genelist <- c(genelist,loc)
  } else {
    nomatch <- c(nomatch,yfg[[i,2]])
  }
}
DoHeatmap(all.normTREE, features = all.genes[genelist],disp.min = -1.5, disp.max = 1.5)  + theme(axis.text.y = element_text(size = 8))

DotPlot(all.normTREE,features=rev(all.genes[genelist])) + RotatedAxis()

for (i in 1:length(genelist)){
  

FeaturePlot(all.normTREE,features = all.genes[genelist[i]], ncol = 1) + NoLegend() + NoAxes()
}
#heatmap of yfg (your favorite gene)

yfg <- read.csv("D:/data/octo seq/GeneIDs - More Neuro.csv",stringsAsFactors=FALSE)
yfg.unique = unique(yfg[,2])
genelist <- vector()
nomatch <- list()
for (i in 1:length(yfg.unique)){
  gene <- yfg.unique[i]
#gene<-substr(gene,1,str_length(gene)-1)
  loc <- grep(gene,all.genes)
  if (length(loc)>0) {
    genelist <- c(genelist,loc)
  } else {
    nomatch <- c(nomatch,yfg[[i,2]])
  }
}
DotPlot(all.normTREE,features=rev(all.genes[genelist[1:100]])) + RotatedAxis()+ theme(axis.text.x = element_text(size = 6))

DotPlot(all.normTREE,features=rev(all.genes[genelist[101:200]])) + RotatedAxis()+ theme(axis.text.x = element_text(size = 6))

DotPlot(all.normTREE,features=rev(all.genes[genelist[201:270]])) + RotatedAxis()+ theme(axis.text.x = element_text(size = 6))

#heatmap of gpcrs

yfg <- read.csv("D:/data/octo seq/GeneIDs - All GPCRs.csv",stringsAsFactors=FALSE)

genelist <- vector()
nomatch <- list()
for (i in 1:327){
  gene <- yfg[[i,2]]
#gene<-substr(gene,1,str_length(gene)-1)
  loc <- grep(gene,all.genes)
  if (length(loc)>0) {
    genelist <- c(genelist,loc)
  } else {
    nomatch <- c(nomatch,yfg[[i,2]])
  }
}
DotPlot(all.normTREE,features=rev(all.genes[genelist[1:50]])) + RotatedAxis()+ theme(axis.text.x = element_text(size = 6))

DotPlot(all.normTREE,features=rev(all.genes[genelist[51:100]])) + RotatedAxis()+ theme(axis.text.x = element_text(size = 6))

DotPlot(all.normTREE,features=rev(all.genes[genelist[101:150]])) + RotatedAxis()+ theme(axis.text.x = element_text(size = 6))

DotPlot(all.normTREE,features=rev(all.genes[genelist[151:200]])) + RotatedAxis()+ theme(axis.text.x = element_text(size = 6))

DotPlot(all.normTREE,features=rev(all.genes[genelist[201:243]])) + RotatedAxis()+ theme(axis.text.x = element_text(size = 6))

zinc <- grep("c2h2",all.genes,ignore.case=TRUE)
DotPlot(all.normTREE,features=rev(all.genes[zinc[1:500]])) + RotatedAxis()+ theme(axis.text.x = element_text(size = 4))

DotPlot(all.normTREE,features=rev(all.genes[zinc[501:1000]])) + RotatedAxis()+ theme(axis.text.x = element_text(size = 4))

DotPlot(all.normTREE,features=rev(all.genes[zinc[1001:1500]])) + RotatedAxis()+ theme(axis.text.x = element_text(size = 4))

#krab c2h2 zinc fingers are noted in albertin et al. probably not getting them all
DotPlot(all.normTREE,features=all.genes[grep("krab",all.genes,ignore.case=TRUE)]) + RotatedAxis()+ theme(axis.text.x = element_text(size = 4))

DotPlot(all.normTREE,features=all.genes[grep("hox",all.genes,ignore.case=TRUE)]) + RotatedAxis()+ theme(axis.text.x = element_text(size = 4))

cadh <-grep("cadherin",all.genes,ignore.case=TRUE)


DotPlot(all.normTREE,features=all.genes[cadh[1:50]]) + RotatedAxis()+ theme(axis.text.x = element_text(size = 6))

DotPlot(all.normTREE,features=all.genes[cadh[51:100]]) + RotatedAxis()+ theme(axis.text.x = element_text(size = 6))

DotPlot(all.normTREE,features=all.genes[cadh[101:150]]) + RotatedAxis()+ theme(axis.text.x = element_text(size = 6))

DotPlot(all.normTREE,features=all.genes[cadh[151:193]]) + RotatedAxis()+ theme(axis.text.x = element_text(size = 6))
## Warning in FetchData(object = object, vars = features): The following requested
## variables were not found: NA

FeaturePlot(all.normTREE,features = all.genes[grep("protocadherin",all.genes,ignore.case=TRUE)])

print(nomatch)
## [[1]]
## [1] "Ocbimv22001825m"
## 
## [[2]]
## [1] "Ocbimv22002970m"
## 
## [[3]]
## [1] "Ocbimv22002972m"
## 
## [[4]]
## [1] "Ocbimv22002973m"
## 
## [[5]]
## [1] "Ocbimv22004108m"
## 
## [[6]]
## [1] "Ocbimv22006261m"
## 
## [[7]]
## [1] "Ocbimv22006262m"
## 
## [[8]]
## [1] "Ocbimv22006266m"
## 
## [[9]]
## [1] "Ocbimv22006948m"
## 
## [[10]]
## [1] "Ocbimv22009549m"
## 
## [[11]]
## [1] "Ocbimv22009853m"
## 
## [[12]]
## [1] "Ocbimv22009966m"
## 
## [[13]]
## [1] "Ocbimv22010055m"
## 
## [[14]]
## [1] "Ocbimv22011033m"
## 
## [[15]]
## [1] "Ocbimv22011096m"
## 
## [[16]]
## [1] "Ocbimv22011212m"
## 
## [[17]]
## [1] "Ocbimv22011397m"
## 
## [[18]]
## [1] "Ocbimv22011400m"
## 
## [[19]]
## [1] "Ocbimv22011405m"
## 
## [[20]]
## [1] "Ocbimv22011406m"
## 
## [[21]]
## [1] "Ocbimv22011776m"
## 
## [[22]]
## [1] "Ocbimv22012294m"
## 
## [[23]]
## [1] "Ocbimv22012937m"
## 
## [[24]]
## [1] "Ocbimv22013682m"
## 
## [[25]]
## [1] "Ocbimv22013759m"
## 
## [[26]]
## [1] "Ocbimv22013760m"
## 
## [[27]]
## [1] "Ocbimv22016114m"
## 
## [[28]]
## [1] "Ocbimv22016116m"
## 
## [[29]]
## [1] "Ocbimv22016427m"
## 
## [[30]]
## [1] "Ocbimv22019232m"
## 
## [[31]]
## [1] "Ocbimv22019923m"
## 
## [[32]]
## [1] "Ocbimv22020068m"
## 
## [[33]]
## [1] "Ocbimv22020136m"
## 
## [[34]]
## [1] "Ocbimv22020457m"
## 
## [[35]]
## [1] "Ocbimv22021289m"
## 
## [[36]]
## [1] "Ocbimv22021291m"
## 
## [[37]]
## [1] "Ocbimv22021509m"
## 
## [[38]]
## [1] "Ocbimv22022047m"
## 
## [[39]]
## [1] "Ocbimv22022873m"
## 
## [[40]]
## [1] "Ocbimv22024696m"
## 
## [[41]]
## [1] "Ocbimv22024958m"
## 
## [[42]]
## [1] "Ocbimv22025444m"
## 
## [[43]]
## [1] "Ocbimv22025567m"
## 
## [[44]]
## [1] "Ocbimv22025569m"
## 
## [[45]]
## [1] "Ocbimv22026028m"
## 
## [[46]]
## [1] "Ocbimv22026939m"
## 
## [[47]]
## [1] "Ocbimv22027432m"
## 
## [[48]]
## [1] "Ocbimv22027591m"
## 
## [[49]]
## [1] "Ocbimv22027699m"
## 
## [[50]]
## [1] "Ocbimv22029288m"
## 
## [[51]]
## [1] "Ocbimv22029408m"
## 
## [[52]]
## [1] "Ocbimv22029503m"
## 
## [[53]]
## [1] "Ocbimv22030827m"
## 
## [[54]]
## [1] "Ocbimv22030828m"
## 
## [[55]]
## [1] "Ocbimv22030882m"
## 
## [[56]]
## [1] "Ocbimv22031366m"
## 
## [[57]]
## [1] "Ocbimv22031367m"
## 
## [[58]]
## [1] "Ocbimv22032407m"
## 
## [[59]]
## [1] "Ocbimv22032659m"
## 
## [[60]]
## [1] "Ocbimv22033043m"
## 
## [[61]]
## [1] "Ocbimv22033849m"
## 
## [[62]]
## [1] "Ocbimv22034321m"
## 
## [[63]]
## [1] "Ocbimv22034322m"
## 
## [[64]]
## [1] "Ocbimv22034324m"
## 
## [[65]]
## [1] "Ocbimv22034325m"
## 
## [[66]]
## [1] "Ocbimv22034326m"
## 
## [[67]]
## [1] "Ocbimv22034328m"
## 
## [[68]]
## [1] "Ocbimv22034329m"
## 
## [[69]]
## [1] "Ocbimv22034330m"
## 
## [[70]]
## [1] "Ocbimv22034332m"
## 
## [[71]]
## [1] "Ocbimv22034333m"
## 
## [[72]]
## [1] "Ocbimv22034334m"
## 
## [[73]]
## [1] "Ocbimv22034590m"
## 
## [[74]]
## [1] "Ocbimv22036037m"
## 
## [[75]]
## [1] "Ocbimv22036485m"
## 
## [[76]]
## [1] "Ocbimv22036695m"
## 
## [[77]]
## [1] "Ocbimv22036699m"
## 
## [[78]]
## [1] "Ocbimv22037101m"
## 
## [[79]]
## [1] "Ocbimv22037715m"
## 
## [[80]]
## [1] "Ocbimv22038378m"
## 
## [[81]]
## [1] "Ocbimv22038381m"
## 
## [[82]]
## [1] "Ocbimv22038642m"
## 
## [[83]]
## [1] "Ocbimv22039182m"
## 
## [[84]]
## [1] "Ocbimv22039241m"
FeaturePlot(all.normTREE,features = all.genes[genelist[13:22]]) + NoLegend() + NoAxes()

FeaturePlot(all.normTREE,features = all.genes[grep("Synaptotagmin",all.genes)],max.cutoff = 10)

FeaturePlot(all.normTREE,features = all.genes[grep("021175",all.genes)]) + 
scale_color_gradientn( colours = c('lightgrey', 'blue'),  limits = c(0, 8))
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.

FeaturePlot(all.normTREE,features = all.genes[grep("VACHT",all.genes)])

FeaturePlot(all.normTREE,features = all.genes[grep("VGlut",all.genes)])

FeaturePlot(all.normTREE,features = all.genes[grep("TH-O",all.genes)])

FeaturePlot(all.normTREE,features = all.genes[grep("TyrBH",all.genes)])

FeaturePlot(all.normTREE,features = all.genes[grep("FMRF amide",all.genes)])

FeaturePlot(all.normTREE,features = all.genes[grep("FMRF related",all.genes)])

FeaturePlot(all.normTREE,features = all.genes[grep("000748",all.genes)])

FeaturePlot(all.normTREE,features = all.genes[grep("25965",all.genes)])

FeaturePlot(all.normTREE,features = all.genes[grep("glutsyn",all.genes,ignore.case=TRUE)])